Thesis Files/MainQLearningRNN_OptStratGeneratorThesis.R

library(ReinforcementLearningwithR)
require(compiler)
library(tictoc)

strat <- "TikTak1"
antistrat <- get.antistrat()[strat]

set.seed(123456)
file.name <- paste0("opt.run.",strat,".",Sys.Date(),".RNN")
continue <- FALSE

a.MC <- c(0.1)
a.Q <- c(0.1)
hybrid.decay <- c(0.98)
block.no <- 500 #Number of Blocks to Play
eval.no <- 1000 #Evaluation at the end
rounds.no <- 60 #Number of periods of game

generate.best.strat <- function(strat, antistrat, a.MC, a.Q, hybrid.decay, block.no, eval.no, rounds.no, continue=FALSE, file.name){
  restore.point("generate.best.strat")
  my.strat <- strat

  encoding.state <- c("TimeSeries.minimal")
  eval.strat <- Model.strat.RNN.TimeSeries.minimal

  game.object <- Get.Game.Object.PD(encoding.state = encoding.state, eval.strategy = eval.strat, encoding.params=NULL)

  game.object$game.pars$T <- rounds.no
  game.object$game.pars$T.max <- rounds.no
  game.object$game.pars$delta <- 0.95
  game.object$game.pars$other.strategies <- c(get(my.strat))
  names(game.object$game.pars$other.strategies) <- my.strat
  game.object$game.pars$err.D.prob <- 0.15
  game.object$game.pars$err.C.prob <- 0.0
  assign("game.object",game.object,envir=.GlobalEnv)

  algo.par <- Get.Def.Par.QLearningPersExpPath()

  algo.par$action.policy <- "exploration.path" #May be 'exploration.path' or 'epsilon.greedy'

  algo.par$expl.path.multi.start <- 0.85
  algo.par$expl.path.multi.end <- 0.95
  algo.par$expl.path.multi.decay.type <- "linear"
  algo.par$expl.path.multi.best.db <- 100
  algo.par$expl.path.multi.best.disc <- 0.98
  algo.par$expl.path.multi.Kp.var <- 0.005 # proportional gain
  algo.par$expl.path.multi.Ki.var <- 0.00001 # integral time
  algo.par$expl.path.multi.Kd.var <- 0.000005 # derivative time
  algo.par$expl.path.multi.Kp.shock <- 0.005 # proportional gain
  algo.par$expl.path.multi.Ki.shock <- 0.00001 # integral time
  algo.par$expl.path.multi.Kd.shock <- 0.000005 # derivative time
  algo.par$expl.path.multi.Kp.surp <- 0.5 # proportional gain
  algo.par$expl.path.multi.Ki.surp <- 0.2 # integral time
  algo.par$expl.path.multi.Kd.surp <- 0.05 # derivative time
  algo.par$expl.path.multi.Kp.fam <- 0.5 # proportional gain
  algo.par$expl.path.multi.Ki.fam <- 0.02 # integral time
  algo.par$expl.path.multi.Kd.fam <- 0.01 # derivative time
  algo.par$expl.path.multi.Kp.db <- 5 # data base for proportional
  algo.par$expl.path.multi.Ki.db <- 100 # data base for integral
  algo.par$expl.path.multi.Kd.db <- 5 # data base for calculating derivative
  algo.par$expl.path.multi.Kp.disc <- 0.95 # discounting factor for proportional derivative
  algo.par$expl.path.multi.Ki.disc <- 0.99 # discounting factor for determining relevance of points for integral
  algo.par$expl.path.multi.Kd.disc <- 0.95 # discounting factor for determining derivative

  algo.par$expl.path.multi.start.var <- 0.1
  algo.par$expl.path.multi.start.shock <- 0.1
  algo.par$expl.path.multi.start.surp <- 0
  algo.par$expl.path.multi.start.fam <- 0

  algo.par$expl.path.multi.start.frac.var <- 0.5
  algo.par$expl.path.multi.start.frac.shock <- 0.5
  algo.par$expl.path.multi.start.frac.surp <- 0
  algo.par$expl.path.multi.start.frac.fam <- 0
  algo.par$expl.path.multi.end.frac.var <- 0.5
  algo.par$expl.path.multi.end.frac.shock <- 0.5
  algo.par$expl.path.multi.end.frac.surp <- 0
  algo.par$expl.path.multi.end.frac.fam <- 0

  algo.par$gamma <- game.object$game.pars$delta
  algo.par$a <- a.Q
  algo.par$replay.intensive <- 1
  algo.par$curio.beta <- 0

  algo.par$block.curr <- 1
  algo.par$block.best <- 1
  algo.par$block.expl.var <- 0
  algo.par$block.expl.shock <- 0
  algo.par$block.expl.surp <- 0
  algo.par$block.expl.fam <- 0
  algo.par$block.expl.vs <- 0
  algo.par$block.expl.multi <- 2


  block.length <- (algo.par$block.curr+algo.par$block.best+algo.par$block.expl.var+algo.par$block.expl.shock+algo.par$block.expl.surp+algo.par$block.expl.fam+algo.par$block.expl.vs+algo.par$block.expl.multi)*game.object$game.pars$T
  algo.par$force.last <- block.length*1
  algo.par$batch.size <- block.length*5
  algo.par$max.mem <- block.length*100


  blocks <- block.no #new: 1000 #main 250
  algo.par$hybrid.Q.a.MC <- a.MC
  algo.par$hybrid.Q.apply <- "always" #also used for MC
  algo.par$hybrid.decay <- hybrid.decay
  algo.par$only.experienced <- TRUE
  algo.par$use.rnn <- TRUE

  algo.par$hybrid.Q <- TRUE
  algo.par$Q.Learning <- FALSE
  algo.par$MC <- FALSE
  algo.par$hybrid.switch <- TRUE

  model.par <- Get.Def.Par.RNN()
  model.par$hidden.nodes <- c(128,64)
  model.par$layer.type <- c("lstm","dense")
  model.par$activation.hidden <- c("sigmoid","sigmoid")
  model.par$dropout <- c(0,0) #Dropouts for the hidden layers. 0 deactivates.
  model.par$recurrent.dropout <- c(0) #Should there be dropout in the recurrent layer?
  model.par$input.dropout <- c(0) #Should there be a input dropout? NULL and 0 deactivates.

  model.par$epochs <- 5
  model.par$single.dimensional <- TRUE
  model.par$batch.size.train <- 600
  model.par$give.up.precision <- 10



  if(continue){
    load(paste0(file.name,".tmp"))
    if(as.character(evaluator$model.best)=="<pointer: 0x0>"){
      if(!is.null(evaluator$model.best.out.file)){
        evaluator$model.best <- load_model_hdf5(filepath=evaluator$model.best.out.file)
      } else {
        stop("best.out.file not defined.")
      }
    }
    if(as.character(evaluator$model.cur)=="<pointer: 0x0>"){
      if(!is.null(evaluator$model.cur.out.file)){
        evaluator$model.cur <- load_model_hdf5(filepath=evaluator$model.cur.out.file)
      } else {
        stop("cur.out.file not defined.")
      }
    }
  } else {
    evaluator <- Setup.QLearningPersExpPath(game.object, algo.par=algo.par, model.par=model.par)
    algo.var <- Initialise.QLearningPersExpPath(game.object, algo.par, memory.init="self.play", memory.param=list(no=100), model.par=model.par)
  }

  res <- Train.QLearningPersExpPath(evaluator=evaluator, model.par=model.par, algo.par=algo.par, algo.var=algo.var, model.par.surp=NULL, model.par.fam=NULL, game.object = game.object, blocks=blocks, eval.only=FALSE, start.w.training = TRUE,out.file=paste0(file.name,".tmp"))

  #Save Memory & model
  evaluator <- res$evaluator
  algo.var$memory <- res$algo.var$memory
  algo.var$analysis <- res$algo.var$analysis
  algo.var$epsilon <- res$algo.var$epsilon
  algo.var$path.goal.var <- res$algo.var$path.goal.var
  algo.var$path.goal.shock <- res$algo.var$path.goal.shock
  algo.var$path.goal.surp <- res$algo.var$path.goal.surp
  algo.var$path.goal.fam <- res$algo.var$path.goal.fam
  algo.var$path.goal.multi <- res$algo.var$path.goal.multi
  algo.var$expl.path.var <- res$algo.var$expl.path.var
  algo.var$expl.path.shock <- res$algo.var$expl.path.shock
  algo.var$expl.path.surp <- res$algo.var$expl.path.surp
  algo.var$expl.path.fam <- res$algo.var$expl.path.fam
  algo.var$expl.path.multi <- res$algo.var$expl.path.multi

  #Save Memory & model
  idio.name <- paste0("opt.run.RNN.full.",my.strat)
  file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"),"before.StratTourn", sep=" ")
  save(evaluator, algo.var, algo.par, game.object, model.par, file=file.name)

  # Init game
  game = make.pd.game(err.D.prob=0.15, delta=0.95)

  strat = nlist(Model.strat.RNN.TimeSeries.minimal,get(my.strat), get(antistrat))

  tourn.no.limit = init.tournament(game=game, strat=strat, game.seeds=234567)
  set.seed(234567)
  tourn.no.limit = run.tournament(tourn=tourn.no.limit, R = eval.no)
  disable.restore.points(FALSE)
  restore.point("after first tourn")
  r.no.limit <- get.matches.vs.matrix(tourn.no.limit$dt)["Model.strat.RNN.TimeSeries.minimal","get(my.strat)"]

  tourn = init.tournament(game=game, strat=strat)
  set.seed(234567)
  tourn = run.tournament(tourn=tourn, R = eval.no, T.max=rounds.no)
  r.limit <- get.matches.vs.matrix(tourn$dt)["Model.strat.RNN.TimeSeries.minimal","get(my.strat)"]

  file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"), sep=" ")

  #Save Memory & model
  save(evaluator, algo.var, algo.par, game.object, model.par, r.no.limit, r.limit, tourn, tourn.no.limit, file=file.name)

  show.tournament(tourn)
  return(r)
}

disable.restore.points(TRUE)
enableJIT(3)
generate.best.strat(strat=strat, antistrat=antistrat, a.MC=a.MC, a.Q=a.Q, hybrid.decay=hybrid.decay, block.no, eval.no, rounds.no=rounds.no, continue=continue, file.name=file.name)
MartinKies/USLR documentation built on Nov. 10, 2019, 5:24 a.m.